{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "mLXw6zd-k3Xd"
   },
   "source": [
    "##Setup\n",
    "\n",
    "You will need to make a copy of this notebook in your Google Drive before you can edit the homework files. You can do so with **File &rarr; Save a copy in Drive**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "4HBPnmbIPPyl"
   },
   "outputs": [],
   "source": [
    "#@title mount your Google Drive\n",
    "#@markdown Your work will be stored in a folder called `cs285_f2021` by default to prevent Colab instance timeouts from deleting your edits.\n",
    "\n",
    "import os\n",
    "from google.colab import drive\n",
    "drive.mount('/content/gdrive')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "OuCfTLJIx5nQ"
   },
   "outputs": [],
   "source": [
    "#@title set up mount symlink\n",
    "\n",
    "DRIVE_PATH = '/content/gdrive/My\\ Drive/cs285_f2021'\n",
    "DRIVE_PYTHON_PATH = DRIVE_PATH.replace('\\\\', '')\n",
    "if not os.path.exists(DRIVE_PYTHON_PATH):\n",
    "  %mkdir $DRIVE_PATH\n",
    "\n",
    "## the space in `My Drive` causes some issues,\n",
    "## make a symlink to avoid this\n",
    "SYM_PATH = '/content/cs285_f2021'\n",
    "if not os.path.exists(SYM_PATH):\n",
    "  !ln -s $DRIVE_PATH $SYM_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "XTtWDO-Bkqnn"
   },
   "outputs": [],
   "source": [
    "#@title apt install requirements\n",
    "\n",
    "#@markdown Run each section with Shift+Enter\n",
    "\n",
    "#@markdown Double-click on section headers to show code.\n",
    "\n",
    "!apt update \n",
    "!apt install -y --no-install-recommends \\\n",
    "        build-essential \\\n",
    "        curl \\\n",
    "        git \\\n",
    "        gnupg2 \\\n",
    "        make \\\n",
    "        cmake \\\n",
    "        ffmpeg \\\n",
    "        swig \\\n",
    "        libz-dev \\\n",
    "        unzip \\\n",
    "        zlib1g-dev \\\n",
    "        libglfw3 \\\n",
    "        libglfw3-dev \\\n",
    "        libxrandr2 \\\n",
    "        libxinerama-dev \\\n",
    "        libxi6 \\\n",
    "        libxcursor-dev \\\n",
    "        libgl1-mesa-dev \\\n",
    "        libgl1-mesa-glx \\\n",
    "        libglew-dev \\\n",
    "        libosmesa6-dev \\\n",
    "        lsb-release \\\n",
    "        ack-grep \\\n",
    "        patchelf \\\n",
    "        wget \\\n",
    "        xpra \\\n",
    "        xserver-xorg-dev \\\n",
    "        xvfb \\\n",
    "        python-opengl \\\n",
    "        ffmpeg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "QeDMsMOXUAkN"
   },
   "outputs": [],
   "source": [
    "#@title download mujoco\n",
    "\n",
    "MJC_PATH = '{}/mujoco'.format(SYM_PATH)\n",
    "%mkdir $MJC_PATH\n",
    "%cd $MJC_PATH\n",
    "!wget -q https://www.roboti.us/download/mujoco200_linux.zip\n",
    "!unzip -q mujoco200_linux.zip\n",
    "%mv mujoco200_linux mujoco200\n",
    "%rm mujoco200_linux.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "wTsf6RYGk_pz"
   },
   "outputs": [],
   "source": [
    "#@title update mujoco paths\n",
    "\n",
    "import os\n",
    "\n",
    "os.environ['LD_LIBRARY_PATH'] += ':{}/mujoco200/bin'.format(MJC_PATH)\n",
    "os.environ['MUJOCO_PY_MUJOCO_PATH'] = '{}/mujoco200'.format(MJC_PATH)\n",
    "os.environ['MUJOCO_PY_MJKEY_PATH'] = '{}/mjkey.txt'.format(MJC_PATH)\n",
    "\n",
    "## installation on colab does not find *.so files\n",
    "## in LD_LIBRARY_PATH, copy over manually instead\n",
    "!cp $MJC_PATH/mujoco200/bin/*.so /usr/lib/x86_64-linux-gnu/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "JI_nuhTulBvU"
   },
   "source": [
    "Copy over `mjkey.txt` into `/content/cs285_f2021/mujoco` before this step"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "amF0DgEyklFl"
   },
   "outputs": [],
   "source": [
    "#@title clone and install mujoco-py\n",
    "\n",
    "%cd $MJC_PATH\n",
    "!git clone https://github.com/openai/mujoco-py.git\n",
    "%cd mujoco-py\n",
    "%pip install -e .\n",
    "\n",
    "## cythonize at the first import\n",
    "import mujoco_py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "X_aXQac0f3pr"
   },
   "outputs": [],
   "source": [
    "#@title clone homework repo\n",
    "\n",
    "%cd $SYM_PATH\n",
    "!git clone https://github.com/berkeleydeeprlcourse/homework_fall2021.git\n",
    "%cd homework_fall2021/hw1\n",
    "%pip install -r requirements_colab.txt\n",
    "%pip install -e ."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "8y_M1tGxmGhT"
   },
   "outputs": [],
   "source": [
    "#@title set up virtual display\n",
    "\n",
    "from pyvirtualdisplay import Display\n",
    "\n",
    "display = Display(visible=0, size=(1400, 900))\n",
    "display.start()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 438
    },
    "colab_type": "code",
    "id": "y7cywOEgo4a8",
    "outputId": "c91293e2-0424-4427-b57e-0e12653c991a"
   },
   "outputs": [],
   "source": [
    "#@title test virtual display\n",
    "\n",
    "#@markdown If you see a video of a four-legged ant fumbling about, setup is complete!\n",
    "\n",
    "import gym\n",
    "from cs285.infrastructure.colab_utils import (\n",
    "    wrap_env,\n",
    "    show_video\n",
    ") \n",
    "\n",
    "env = wrap_env(gym.make(\"Ant-v2\"))\n",
    "\n",
    "observation = env.reset()\n",
    "for i in range(100):\n",
    "    env.render(mode='rgb_array')\n",
    "    obs, rew, term, _ = env.step(env.action_space.sample() ) \n",
    "    if term:\n",
    "      break;\n",
    "            \n",
    "env.close()\n",
    "print('Loading video...')\n",
    "show_video()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "eQx7oDGeeKWj"
   },
   "source": [
    "## Editing Code\n",
    "\n",
    "To edit code, click the folder icon on the left menu. Navigate to the corresponding file (`cs285_f2021/...`). Double click a file to open an editor. There is a timeout of about ~12 hours with Colab while it is active (and less if you close your browser window). We sync your edits to Google Drive so that you won't lose your work in the event of an instance timeout, but you will need to re-mount your Google Drive and re-install packages with every new instance."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "UunygyDXrx7k"
   },
   "source": [
    "## Run Behavior Cloning (Problem 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "enh5ZMHftEO7"
   },
   "outputs": [],
   "source": [
    "#@title imports\n",
    "\n",
    "import os\n",
    "import time\n",
    "import numpy as np\n",
    "\n",
    "from cs285.infrastructure.rl_trainer import RL_Trainer\n",
    "from cs285.agents.bc_agent import BCAgent\n",
    "from cs285.policies.loaded_gaussian_policy import LoadedGaussianPolicy\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "imnAkQ6jryL7"
   },
   "outputs": [],
   "source": [
    "#@title runtime arguments\n",
    "\n",
    "class Args:\n",
    "\n",
    "  def __getitem__(self, key):\n",
    "    return getattr(self, key)\n",
    "\n",
    "  def __setitem__(self, key, val):\n",
    "    setattr(self, key, val)\n",
    "\n",
    "  #@markdown expert data\n",
    "  expert_policy_file = 'cs285/policies/experts/Ant.pkl' #@param\n",
    "  expert_data = 'cs285/expert_data/expert_data_Ant-v2.pkl' #@param\n",
    "  env_name = 'Ant-v2' #@param ['Ant-v2', 'Humanoid-v2', 'Walker2d-v2', 'HalfCheetah-v2', 'Hopper-v2']\n",
    "  exp_name = 'test_bc_ant' #@param\n",
    "  do_dagger = False #@param {type: \"boolean\"}\n",
    "  ep_len = 1000 #@param {type: \"integer\"}\n",
    "  save_params = False #@param {type: \"boolean\"}\n",
    "\n",
    "  num_agent_train_steps_per_iter = 1000 #@param {type: \"integer\"})\n",
    "  n_iter = 1 #@param {type: \"integer\"})\n",
    "\n",
    "  #@markdown batches & buffers\n",
    "  batch_size = 1000 #@param {type: \"integer\"})\n",
    "  eval_batch_size = 1000 #@param {type: \"integer\"}\n",
    "  train_batch_size = 100 #@param {type: \"integer\"}\n",
    "  max_replay_buffer_size = 1000000 #@param {type: \"integer\"}\n",
    "\n",
    "  #@markdown network\n",
    "  n_layers = 2 #@param {type: \"integer\"}\n",
    "  size = 64 #@param {type: \"integer\"}\n",
    "  learning_rate = 5e-3 #@param {type: \"number\"}\n",
    "\n",
    "  #@markdown logging\n",
    "  video_log_freq = 5 #@param {type: \"integer\"}\n",
    "  scalar_log_freq = 1 #@param {type: \"integer\"}\n",
    "\n",
    "  #@markdown gpu & run-time settings\n",
    "  no_gpu = False #@param {type: \"boolean\"}\n",
    "  which_gpu = 0 #@param {type: \"integer\"}\n",
    "  seed = 1 #@param {type: \"integer\"}\n",
    "\n",
    "args = Args()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "fLnU1evmss4I"
   },
   "outputs": [],
   "source": [
    "#@title define `BC_Trainer`\n",
    "class BC_Trainer(object):\n",
    "\n",
    "    def __init__(self, params):\n",
    "        #######################\n",
    "        ## AGENT PARAMS\n",
    "        #######################\n",
    "\n",
    "        agent_params = {\n",
    "            'n_layers': params['n_layers'],\n",
    "            'size': params['size'],\n",
    "            'learning_rate': params['learning_rate'],\n",
    "            'max_replay_buffer_size': params['max_replay_buffer_size'],\n",
    "            }\n",
    "\n",
    "        self.params = params\n",
    "        self.params['agent_class'] = BCAgent ## TODO: look in here and implement this\n",
    "        self.params['agent_params'] = agent_params\n",
    "\n",
    "        ################\n",
    "        ## RL TRAINER\n",
    "        ################\n",
    "\n",
    "        self.rl_trainer = RL_Trainer(self.params) ## TODO: look in here and implement this\n",
    "\n",
    "        #######################\n",
    "        ## LOAD EXPERT POLICY\n",
    "        #######################\n",
    "\n",
    "        print('Loading expert policy from...', self.params['expert_policy_file'])\n",
    "        self.loaded_expert_policy = LoadedGaussianPolicy(self.params['expert_policy_file'])\n",
    "        print('Done restoring expert policy...')\n",
    "\n",
    "    def run_training_loop(self):\n",
    "\n",
    "        self.rl_trainer.run_training_loop(\n",
    "            n_iter=self.params['n_iter'],\n",
    "            initial_expertdata=self.params['expert_data'],\n",
    "            collect_policy=self.rl_trainer.agent.actor,\n",
    "            eval_policy=self.rl_trainer.agent.actor,\n",
    "            relabel_with_expert=self.params['do_dagger'],\n",
    "            expert_policy=self.loaded_expert_policy,\n",
    "        )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "colab": {},
    "colab_type": "code",
    "id": "7UkzHBfxsxH8"
   },
   "outputs": [],
   "source": [
    "#@title create directory for logging\n",
    "\n",
    "if args.do_dagger:\n",
    "    logdir_prefix = 'q2_'  # The autograder uses the prefix `q2_`\n",
    "    assert args.n_iter>1, ('DAgger needs more than 1 iteration (n_iter>1) of training, to iteratively query the expert and train (after 1st warmstarting from behavior cloning).')\n",
    "else:\n",
    "    logdir_prefix = 'q1_'  # The autograder uses the prefix `q1_`\n",
    "    assert args.n_iter==1, ('Vanilla behavior cloning collects expert data just once (n_iter=1)')\n",
    "\n",
    "data_path ='/content/cs285_f2021/hw1/data'\n",
    "if not (os.path.exists(data_path)):\n",
    "    os.makedirs(data_path)\n",
    "logdir = logdir_prefix + args.exp_name + '_' + args.env_name + \\\n",
    "         '_' + time.strftime(\"%d-%m-%Y_%H-%M-%S\")\n",
    "logdir = os.path.join(data_path, logdir)\n",
    "args['logdir'] = logdir\n",
    "if not(os.path.exists(logdir)):\n",
    "    os.makedirs(logdir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "_qQb789_syt0"
   },
   "outputs": [],
   "source": [
    "## run training\n",
    "print(args.logdir)\n",
    "trainer = BC_Trainer(args)\n",
    "trainer.run_training_loop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "75M0MlR5tUIb"
   },
   "outputs": [],
   "source": [
    "#@markdown You can visualize your runs with tensorboard from within the notebook\n",
    "\n",
    "%load_ext tensorboard\n",
    "%tensorboard --logdir /content/cs285_f2021/hw1/data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "ff9onuUPfPEa"
   },
   "source": [
    "## Running DAgger (Problem 2)\n",
    "Modify the settings above:\n",
    "1. check the `do_dagger` box\n",
    "2. set `n_iters` to `10`\n",
    "and then rerun the code."
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "run_hw1.ipynb",
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
